*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file updated for Nids Wave 5

* THIS IS THE 5th INCOME DO FILE - PERFORMING IMPUTATIONS FOR MISSING DATA: 5 OF 7
* THIS DO FILE PERFORMS IMPUTATIONS FOR MISSING DATA ON THE RELEVANT INCOME VARIABLES

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "Master Income do file (1 of 7)"

version 12.1

*=====================================================================================================================================

* OPENING DATASET PREPARED IN PRIOR DO FILE "Income - Preparing variables for imputation (3 of 7).DO"

set more off

use "$DataOUT\prepdata.dta", clear

*-------------------------------------------------------------------------------------------------------------------------------------

*The regimp routine
cap program drop regimp
program define regimp
	gettoken depvar 0: 0
	local regressors "`0'"
	tempvar lndepvar
	gen `lndepvar'=ln(`depvar')
	xi:regress `lndepvar' `regressors'
	tempvar lndepvar_fit
	predict `lndepvar_fit'
	gen `depvar'_fit=exp(`lndepvar_fit')
	gen `depvar'_imp=`depvar'
	replace `depvar'_imp=`depvar'_fit if `depvar'==. & `depvar'_rec==1 & `depvar'_fit!=.
	gen `depvar'_data=0 if `depvar'_rec==1 & `depvar'!=.
	replace `depvar'_data=1 if `depvar'_rec==1 & `depvar'==. & `depvar'_fit!=.
	replace `depvar'_data=1 if `depvar'_rec==1 & `depvar'==. & `depvar'_imp!=.
	replace `depvar'_data=2 if `depvar'_rec==1 & `depvar'_imp==.
	label define `depvar'_data 0 "Survey" 1 "Imputed" 2 "Not imputed"
	label values `depvar'_data `depvar'_data
end

mvdecode _all, mv(-3=.)

* RUNNING THE IMPUTATIONS

foreach var of varlist othe fwag cheq prof bonu extr cwag swag help ppen ///
uif comp indi inhe rnt retr brid gift loan sale remt {
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' male race_d coloured asian_indian white best_age_yrs age_d agesq tradeunion ///
schooling schoolingsq cert_nomat dip_nomat cert_mat dip_mat bachelors bach_dip honours /// postgrad
married homerooms homeroomssq homerooms_d i.hhintmonth i.province i.geo2011
		quietly sum `var'_data
		if r(mean)>0.4 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}


*----------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS FOR STATE OLD AGE PENSION

//Link :www.services.gov.za/services/content/Home/ServicesForPeople/Socialbenefits/oldagegrant/en_ZA

/*Respondents who do not give a figure for their old-age pension are assigned to the
maximum amount for the month before they were interviewed
Amount of the SOAP:
	- Before 1 April 2012: 1140
	- After  1 April 2012: 1200
	- After  1 April 2014: 1350 (R1370 if older than 75)
	- After  1 April 2015: 1410 (R1430 if older than 75)
	- 2017:	 R1600pm and R1620 if older than 75
*/
*this will be repeated for all other income sources
gen spen_imp=1600 if spen_rec==1
replace spen_imp=1620 if spen_rec==1 & best_age_yrs>=75 & best_age_yrs!=.
gen spenimpute=1 if spen_imp!=. & spen==.
replace spenimpute=0 if spen_imp==. | spen!=.
replace spen=spen_imp if spenimpute==1

*Imputation summary variable for state (RSA) old age pension
gen spen_flg=1 if spen_d==1
replace spen_flg=2 if spenimpute==1
label variable spen_flg "The old age state (RSA) pension data for this individual is:"
label define spen_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed - Still Missing" 4 "Unit imputed"
label values spen_flg spen_flg

*----------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS FOR DISABILITY GRANT

//Link: http://www.services.gov.za/services/content/Home/ServicesForPeople/Socialbenefits/disabilitygrant/en_ZA

/*Respondents who do not give a figure for their disability grant are assigned to
maximum amount for the month before they were interviewed
Amount of the DG (same as for SOAP):
	- Before 1 April 2012: 1140
	- After  1 April 2012: 1200
	- After  1 April 2014: 1350
	- After  1 April 2014: 1410
	- 2017:	 R1600
*/
*Imputation of income from disability grants
gen dis_imp=1600 if dis_rec==1
gen disimpute=1 if dis_imp!=. & dis==.
replace disimpute=0 if dis_imp==. | dis!=.
replace dis=dis_imp if disimpute==1

*Imputation summary variable for disability grant
gen dis_flg=1 if dis_d==1
replace dis_flg=2 if disimpute==1
replace dis_flg=3 if dis==. & dis_rec==1
label variable dis_flg "The disability grant data for this individual is:"
label define dis_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values dis_flg dis_flg

*----------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS FOR CHILD SUPPORT GRANT

//Link: http://www.services.gov.za/services/content/Home/ServicesForPeople/Socialbenefits/childsupportgrant/en_ZA

/*Respondents who do not give a figure for their child support grant are assigned
the maximum amount for the month before they were interviewed, multiplied by the number
of biological children they have.
Amount of the CSG:
	- Before 1 April 2012: 260
	- After  1 April 2012: 280
	- After  1 April 2014: 310
	- After  1 April 2014: 330
	- 2017:	 R380
*/
*Imputation of income from CSG
gen chld_imp=380*biochildren if chld_rec==1 & biochildren>0
gen chldimpute=1 if chld_imp!=. & chld==.
replace chldimpute=0 if chld_imp==. | chld!=.
replace chld=chld_imp if chldimpute==1
replace chld_rec=0 if chld==. & biochildren==. & biochild==0 
*These people report no biological children and give no number for child grant.

*Imputation summary variable for CSG
gen chld_flg=1 if chld_d==1
replace chld_flg=2 if chldimpute==1
replace chld_flg=3 if chld==. & chld_rec==1
label variable chld_flg "The child support grant data for this individual is:"
label define chld_flg 1 "Survey" 2 "Imputed" 3 "Not Imputed" 
label values chld_flg chld_flg

*----------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS FOR FOSTER CARE GRANT

//http://www.services.gov.za/services/content/Home/ServicesForPeople/Socialbenefits/fosterchildgrant/en_ZA

/*Respondents who do not give a figure for their foster child grant are assigned
the maximum amount for the month before they were interviewed.
Amount of the foster care grant:
	- Before 1 April 2012: 740
	- After 1 April 2012: 770
	- After 1 April 2014: 830
	- After 1 April 2015: 860
	- 2017: R920
*/
*Imputation of income from foster care grants

gen fost_imp=920 if fost_rec==1
gen fostimpute=1 if fost_imp!=. & fost==.
replace fostimpute=0 if fost_imp==. | fost!=.
replace fost=fost_imp if fostimpute==1

*Imputation summary variable for foster care grant
gen fost_flg=1 if fost_d==1
replace fost_flg=2 if fostimpute==1
replace fost_flg=3 if fost==. & fost_rec==1
label variable fost_flg "The foster care grant data for this individual is:"
label define fost_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values fost_flg fost_flg

*----------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS FOR CARE DEPENDENCY GRANT

//Link: http://www.services.gov.za/services/content/Home/ServicesForPeople/Socialbenefits/caredependencygrant/en_ZA

/*Respondents who do not give a figure for their cape dependency grant are assigned
the maximum amount for the month before they were interviewed.
Amount of the care dependency grant (same as SOAP):
	- Before 1 April 2012: 1140
	- After  1 April 2012: 1200
	- After  1 April 2014: 1350
	- After  1 April 2015: 1410
	- 2017: R1600
	*/
*Imputation of income from care dependency grants

gen cdep_imp=1600 if cdep_rec==1
gen cdepimpute=1 if cdep_imp!=. & cdep==.
replace cdepimpute=0 if cdep_imp==. | cdep!=.
replace cdep=cdep_imp if cdepimpute==1

*Imputation summary variable for care dependency grant
gen cdep_flg=1 if cdep_d==1
replace cdep_flg=2 if cdepimpute==1
replace cdep_flg=3 if cdep==. & cdep_rec==1
label variable cdep_flg "The care dependency grant data for this individual is:"
label define cdep_flg 1 "Survey" 2 "Imputed" 3 "Not imputed"
label values cdep_flg cdep_flg

*----------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS HOUSEHOLD-LEVEL VARIABLES

* Imputations for one-shot income and imputed rental income from owner-occupied housing
sort w5_hhid 
foreach var of varlist hhq_incb rent_would {
	replace `var'_d=. if w5_hhid==w5_hhid[_n-1]
	quietly count if `var'_d==1
		if r(N)>=100 {
			regimp `var' i.hometype i.homewalls i.homeroof homerooms i.hhrace hhrace_d ///
			hhedu hhedusq i.hhintmonth hhage hhage_d hhtu i.geo2011 if w5_hhid!=w5_hhid[_n-1]
		quietly sum `var'_data
		if r(mean)>0.5 {
			replace `var'_imp=. if `var'_data==1
			lab def `var'_data 0 "Survey" 1 "Not imputed", modify
				}
		else {
				}
		}
				else {
					gen `var'_imp=`var'
					gen `var'_data=0 if `var'_rec==1
					replace `var'_data=1 if `var'_rec==1 & `var'_imp==.
					lab def `var'_data 0 "Survey" 1 "Not imputed", modify
					lab val `var'_data `var'_data
						}
			}
			

gen imprent=rent_would_imp

save "$DataOUT\impdata.dta", replace

* end of do fle 
*========================================================================================================================================
